{ "cells": [ { "cell_type": "markdown", "id": "f0ad9c98", "metadata": {}, "source": [ "# Machine Learning example" ] }, { "cell_type": "markdown", "id": "b1f70d12", "metadata": {}, "source": [ "This is fun! This is fun!" ] }, { "cell_type": "code", "execution_count": 1, "id": "5d28d5bf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "20.25\n" ] } ], "source": [ "print(3**5/12)" ] }, { "cell_type": "code", "execution_count": 2, "id": "8363cb57", "metadata": {}, "outputs": [], "source": [ "#loading libraries\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from sklearn import model_selection\n", "from sklearn.metrics import classification_report\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.svm import SVC\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "78d6cfd8", "metadata": {}, "outputs": [], "source": [ "url = \"https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv\"\n", "names1 = ['sepal-length','sepal-width','petal-length','petal-width','class']\n", "dataB = pd.read_csv(url,names=names1)\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "fd9cd962", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " sepal-length sepal-width petal-length petal-width class\n", "0 5.1 3.5 1.4 0.2 Iris-setosa\n", "1 4.9 3.0 1.4 0.2 Iris-setosa\n", "2 4.7 3.2 1.3 0.2 Iris-setosa\n", "3 4.6 3.1 1.5 0.2 Iris-setosa\n", "4 5.0 3.6 1.4 0.2 Iris-setosa\n", "5 5.4 3.9 1.7 0.4 Iris-setosa\n", "6 4.6 3.4 1.4 0.3 Iris-setosa\n", "7 5.0 3.4 1.5 0.2 Iris-setosa\n", "8 4.4 2.9 1.4 0.2 Iris-setosa\n", "9 4.9 3.1 1.5 0.1 Iris-setosa\n", "10 5.4 3.7 1.5 0.2 Iris-setosa\n", "11 4.8 3.4 1.6 0.2 Iris-setosa\n", "12 4.8 3.0 1.4 0.1 Iris-setosa\n", "13 4.3 3.0 1.1 0.1 Iris-setosa\n", "14 5.8 4.0 1.2 0.2 Iris-setosa\n", "15 5.7 4.4 1.5 0.4 Iris-setosa\n", "16 5.4 3.9 1.3 0.4 Iris-setosa\n", "17 5.1 3.5 1.4 0.3 Iris-setosa\n", "18 5.7 3.8 1.7 0.3 Iris-setosa\n", "19 5.1 3.8 1.5 0.3 Iris-setosa\n" ] } ], "source": [ "# dataB has the iris database that we will analyze. The names\n", "# above are the column names\n", "print(dataB.head(20))" ] }, { "cell_type": "code", "execution_count": 5, "id": "c5d59fff", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " sepal-length sepal-width petal-length petal-width\n", "count 150.000000 150.000000 150.000000 150.000000\n", "mean 5.843333 3.054000 3.758667 1.198667\n", "std 0.828066 0.433594 1.764420 0.763161\n", "min 4.300000 2.000000 1.000000 0.100000\n", "25% 5.100000 2.800000 1.600000 0.300000\n", "50% 5.800000 3.000000 4.350000 1.300000\n", "75% 6.400000 3.300000 5.100000 1.800000\n", "max 7.900000 4.400000 6.900000 2.500000\n" ] } ], "source": [ "print(dataB.describe())" ] }, { "cell_type": "code", "execution_count": 6, "id": "eab3f719", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 150 entries, 0 to 149\n", "Data columns (total 5 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 sepal-length 150 non-null float64\n", " 1 sepal-width 150 non-null float64\n", " 2 petal-length 150 non-null float64\n", " 3 petal-width 150 non-null float64\n", " 4 class 150 non-null object \n", "dtypes: float64(4), object(1)\n", "memory usage: 6.0+ KB\n", "None\n" ] } ], "source": [ "print(dataB.info())" ] }, { "cell_type": "code", "execution_count": 7, "id": "8f01c318", "metadata": {}, "outputs": [], "source": [ "# split the data into the numerical inputs (x values)\n", "# and the output (y values - classifications)\n", "array = dataB.values\n", "X = array[:,0:4]\n", "Y = array[:,4]" ] }, { "cell_type": "code", "execution_count": 40, "id": "f9db27cd", "metadata": {}, "outputs": [], "source": [ "validation_ratio = 0.40" ] }, { "cell_type": "code", "execution_count": 9, "id": "a5f6568c", "metadata": {}, "outputs": [], "source": [ "# this is the percent of the data that I will use as my validation data,\n", "# what's left is my training data." ] }, { "cell_type": "code", "execution_count": 41, "id": "7e4fad5c", "metadata": {}, "outputs": [], "source": [ "X_train, X_valid, Y_train, Y_valid =\\\n", " model_selection.train_test_split(X,Y,\\\n", " test_size=validation_ratio)\n", "scoring=\"accuracy\"" ] }, { "cell_type": "code", "execution_count": 11, "id": "ff03a41e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(90, 60)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(X_train),len(X_valid)" ] }, { "cell_type": "code", "execution_count": 14, "id": "e6b2a876", "metadata": {}, "outputs": [], "source": [ "# algorithms, stored in an list with a shortcut name\n", "models = []\n", "models.append(('LR',LogisticRegression(solver=\\\n", " 'liblinear',multi_class='ovr')))\n", "models.append(('LDA',LinearDiscriminantAnalysis()))\n", "models.append(('KNN',KNeighborsClassifier()))\n", "models.append(('CART',DecisionTreeClassifier()))\n", "models.append(('NB',GaussianNB()))\n", "models.append(('SVM',SVC(gamma='auto')))" ] }, { "cell_type": "code", "execution_count": 42, "id": "0c48943e", "metadata": {}, "outputs": [], "source": [ "# arrays to show our results of testing out our algorithms\n", "results = []\n", "names = []" ] }, { "cell_type": "code", "execution_count": 43, "id": "85528125", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LR: 0.933, 0.038\n", "LDA: 0.989, 0.025\n", "KNN: 0.956, 0.031\n", "CART: 0.967, 0.033\n", "NB: 0.944, 0.046\n", "SVM: 0.978, 0.031\n" ] } ], "source": [ "from warnings import simplefilter\n", "simplefilter(action='ignore', category=FutureWarning)\n", "for name,model in models:\n", " kfold = model_selection.KFold(n_splits=6)\n", " # we will be splitting our training set into 6 pieces\n", " cv_results = model_selection.cross_val_score(model,X_train,\\\n", " Y_train,cv=kfold)\n", " results += [cv_results]\n", " names += [name]\n", " print(name+\": \"+str(round(cv_results.mean(),3))+\", \"\\\n", " +str(round(cv_results.std(),3)))\n", "\n", " " ] }, { "cell_type": "code", "execution_count": 39, "id": "e3d4b873", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = plt.figure()\n", "fig.suptitle('Algorithm Comparison')\n", "ax = fig.add_subplot(111)\n", "plt.boxplot(results)\n", "ax.set_xticklabels(names)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 48, "id": "6a40f82d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}\n", "0.95\n", "[[19 0 0]\n", " [ 0 19 2]\n", " [ 0 1 19]]\n", " precision recall f1-score support\n", "\n", " Iris-setosa 1.00 1.00 1.00 19\n", "Iris-versicolor 0.95 0.90 0.93 21\n", " Iris-virginica 0.90 0.95 0.93 20\n", "\n", " accuracy 0.95 60\n", " macro avg 0.95 0.95 0.95 60\n", " weighted avg 0.95 0.95 0.95 60\n", "\n" ] } ], "source": [ "# more specific data about one algorithm - K Nearest Neighbors\n", "alg = KNeighborsClassifier()\n", "print(alg.get_params())\n", "alg.fit(X_train,Y_train)\n", "predictions = alg.predict(X_valid)\n", "print(accuracy_score(Y_valid,predictions))\n", "print(confusion_matrix(Y_valid,predictions))\n", "print(classification_report(Y_valid,predictions))\n" ] }, { "cell_type": "code", "execution_count": null, "id": "c2b251f6", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" } }, "nbformat": 4, "nbformat_minor": 5 }